package org.dlsu.mt.preprocessing;
import java.io.FileInputStream;

public class TestDocParsing
{
	public TestDocParsing()
	{
		
	}
	
	public String parseWordDocument(String filename)
	{
		StringBuffer temp = new StringBuffer();
		try
	    {
		      HWPFDocument doc = new HWPFDocument (new FileInputStream (filename));
		      Range r = doc.getRange();
		      for (int x = 0; x < r.numSections(); x++)
		      {
			        Section s = r.getSection(x);
			        for (int y = 0; y < s.numParagraphs(); y++)
			        {
				          Paragraph p = s.getParagraph(y);
				          for (int z = 0; z < p.numCharacterRuns(); z++)
				          {
					            //character run
					            CharacterRun run = p.getCharacterRun(z);
					            //character run text
					            String text = run.text();
					            // show us the text
					            temp.append(text);
				          }
				          // use a new line at the paragraph break
				          temp.append("\n");
			        }
		      }
	
		}
		catch(Exception e)
		{
			return null;
		}
		
		return temp.toString();
	}
	
}